In [1]:
cd ../..
/Users/shanekercheval/repos/data-science-template
In [2]:
timestamp = '2022_06_05_12_33_24'
In [3]:
%run "source/config/notebook_settings.py"
from source.library.utilities import Timer, log_info, get_config

config = get_config()
experiment_directory = os.path.join(config['EXPERIMENTS']['DIRECTORY'], f"experiment__{timestamp}")
log_info(f"Experiment Directory: {experiment_directory}")

def file_path(file_name):
    return os.path.join(experiment_directory, file_name)
2022-06-05 12:33:48 - INFO     | Experiment Directory: artifacts/models/experiments/experiment__2022_06_05_12_33_24

Load Data¶

In [4]:
with Timer("Loading training/test datasets"):
    X_train = pd.pandas.read_pickle(file_path('x_train.pkl'))
    X_test = pd.pandas.read_pickle(file_path('x_test.pkl'))
    y_train = pd.pandas.read_pickle(file_path('y_train.pkl'))
    y_test = pd.pandas.read_pickle(file_path('y_test.pkl'))
2022-06-05 12:33:48 - INFO     | *****Timer Started: Loading training/test datasets
2022-06-05 12:33:48 - INFO     | *****Timer Finished (0.00 seconds)
In [5]:
log_info(X_train.shape)
log_info(len(y_train))

log_info(X_test.shape)
log_info(len(y_test))
2022-06-05 12:33:48 - INFO     | (800, 20)
2022-06-05 12:33:48 - INFO     | 800
2022-06-05 12:33:48 - INFO     | (200, 20)
2022-06-05 12:33:48 - INFO     | 200
In [6]:
np.unique(y_train, return_counts=True)
Out[6]:
(array([0, 1]), array([559, 241]))
In [7]:
np.unique(y_train, return_counts=True)[1] / np.sum(np.unique(y_train, return_counts=True)[1])
Out[7]:
array([0.69875, 0.30125])
In [8]:
np.unique(y_test, return_counts=True)[1] / np.sum(np.unique(y_test, return_counts=True)[1])
Out[8]:
array([0.705, 0.295])

In [9]:
file_name = file_path('experiment.yaml')
results = hlp.sklearn_eval.MLExperimentResults.from_yaml_file(yaml_file_name = file_name)

Hyper-Param Tuning - Cross Validation Results¶

Best Scores/Params¶

In [10]:
log_info(f"Best Score: {results.best_score}")
2022-06-05 12:33:48 - INFO     | Best Score: 0.7668027026011365
In [11]:
log_info(f"Best Params: {results.best_params}")
2022-06-05 12:33:48 - INFO     | Best Params: {'model': 'RandomForestClassifier()', 'imputer': 'SimpleImputer()', 'scaler': 'None', 'pca': 'None', 'encoder': 'OneHotEncoder()'}
In [12]:
# Best model from each model-type.
df = results.to_formatted_dataframe(return_style=False, include_rank=True)
df["model_rank"] = df.groupby("model")["roc_auc Mean"].rank(method="first", ascending=False)
df.query('model_rank == 1')
Out[12]:
rank roc_auc Mean roc_auc 95CI.LO roc_auc 95CI.HI model C max_features max_depth n_estimators min_samples_split min_samples_leaf max_samples criterion learning_rate min_child_weight subsample colsample_bytree colsample_bylevel reg_alpha reg_lambda imputer scaler pca encoder model_rank
15 1 0.77 0.72 0.81 RandomForestClassifier() NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN SimpleImputer() None None OneHotEncoder() 1.00
11 2 0.77 0.71 0.82 ExtraTreesClassifier() NaN 0.11 70.00 553.00 12.00 8.00 0.55 entropy NaN NaN NaN NaN NaN NaN NaN SimpleImputer(strategy='median') None PCA('mle') OneHotEncoder() 1.00
0 3 0.76 0.72 0.80 LogisticRegression() NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN SimpleImputer() StandardScaler() None OneHotEncoder() 1.00
7 4 0.76 0.72 0.80 LinearSVC() 0.28 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN SimpleImputer(strategy='most_frequent') MinMaxScaler() PCA('mle') OneHotEncoder() 1.00
23 10 0.75 0.69 0.81 XGBClassifier() NaN NaN 2.00 1397.00 NaN NaN NaN NaN 0.03 16.00 0.61 0.88 0.81 0.10 1.97 SimpleImputer(strategy='median') None None OneHotEncoder() 1.00
In [13]:
results.to_formatted_dataframe(return_style=True,
                               include_rank=True,
                               num_rows=1000)
Out[13]:
rank roc_auc Mean roc_auc 95CI.LO roc_auc 95CI.HI model C max_features max_depth n_estimators min_samples_split min_samples_leaf max_samples criterion learning_rate min_child_weight subsample colsample_bytree colsample_bylevel reg_alpha reg_lambda imputer scaler pca encoder
1 0.767 0.720 0.814 RandomForestClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None None OneHotEncoder()
2 0.766 0.707 0.825 ExtraTreesClassifier() <NA> 0.114 70.000 553.000 12.000 8.000 0.548 entropy <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None PCA('mle') OneHotEncoder()
3 0.763 0.725 0.802 LogisticRegression() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() StandardScaler() None OneHotEncoder()
4 0.761 0.720 0.803 LinearSVC() 0.281 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='most_frequent') MinMaxScaler() PCA('mle') OneHotEncoder()
5 0.761 0.697 0.825 LogisticRegression() 0.001 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') MinMaxScaler() None OneHotEncoder()
6 0.757 0.711 0.803 ExtraTreesClassifier() <NA> 0.681 38.000 1,461.000 23.000 10.000 0.553 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None None CustomOrdinalEncoder()
7 0.756 0.711 0.802 RandomForestClassifier() <NA> 0.685 30.000 1,659.000 25.000 11.000 0.781 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None PCA('mle') OneHotEncoder()
8 0.753 0.716 0.791 RandomForestClassifier() <NA> 0.303 81.000 1,063.000 15.000 27.000 0.502 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None None OneHotEncoder()
9 0.752 0.698 0.805 ExtraTreesClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None None OneHotEncoder()
10 0.751 0.695 0.808 XGBClassifier() <NA> <NA> 2.000 1,397.000 <NA> <NA> <NA> <NA> 0.031 16.000 0.608 0.881 0.812 0.096 1.971 SimpleImputer(strategy='median') None None OneHotEncoder()
11 0.751 0.713 0.789 RandomForestClassifier() <NA> 0.328 5.000 1,047.000 23.000 43.000 0.957 entropy <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None PCA('mle') OneHotEncoder()
12 0.751 0.721 0.781 LinearSVC() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() StandardScaler() None OneHotEncoder()
13 0.747 0.694 0.799 ExtraTreesClassifier() <NA> 0.710 15.000 1,493.000 33.000 27.000 0.914 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='most_frequent') None PCA('mle') OneHotEncoder()
14 0.746 0.716 0.776 LogisticRegression() 23.327 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') StandardScaler() None OneHotEncoder()
15 0.745 0.704 0.786 RandomForestClassifier() <NA> 0.762 88.000 1,235.000 8.000 7.000 0.666 gini <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') None PCA('mle') CustomOrdinalEncoder()
16 0.738 0.686 0.790 XGBClassifier() <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None None OneHotEncoder()
17 0.736 0.695 0.777 ExtraTreesClassifier() <NA> 0.740 14.000 1,645.000 5.000 43.000 0.741 entropy <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='most_frequent') None PCA('mle') CustomOrdinalEncoder()
18 0.730 0.702 0.758 LogisticRegression() 0.000 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') StandardScaler() None CustomOrdinalEncoder()
19 0.727 0.690 0.765 LinearSVC() 0.361 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') MinMaxScaler() PCA('mle') CustomOrdinalEncoder()
20 0.726 0.697 0.755 LogisticRegression() 0.000 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='median') StandardScaler() PCA('mle') CustomOrdinalEncoder()
21 0.723 0.676 0.771 XGBClassifier() <NA> <NA> 8.000 1,657.000 <NA> <NA> <NA> <NA> 0.084 5.000 0.977 0.847 0.788 0.417 1.290 SimpleImputer(strategy='median') None None OneHotEncoder()
22 0.717 0.676 0.758 XGBClassifier() <NA> <NA> 13.000 1,153.000 <NA> <NA> <NA> <NA> 0.026 3.000 0.685 0.549 0.802 0.016 2.353 SimpleImputer() None PCA('mle') CustomOrdinalEncoder()
23 0.714 0.667 0.761 XGBClassifier() <NA> <NA> 12.000 945.000 <NA> <NA> <NA> <NA> 0.156 21.000 0.990 0.851 0.658 0.071 3.523 SimpleImputer() None PCA('mle') OneHotEncoder()
24 0.701 0.669 0.733 LinearSVC() 10.021 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() MinMaxScaler() PCA('mle') CustomOrdinalEncoder()
25 0.660 0.610 0.710 LinearSVC() 0.000 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer(strategy='most_frequent') MinMaxScaler() None OneHotEncoder()
In [14]:
results.to_formatted_dataframe(query='model == "RandomForestClassifier()"', include_rank=True)
Out[14]:
rank roc_auc Mean roc_auc 95CI.LO roc_auc 95CI.HI max_features max_depth n_estimators min_samples_split min_samples_leaf max_samples criterion imputer pca encoder
1 0.767 0.720 0.814 <NA> <NA> <NA> <NA> <NA> <NA> <NA> SimpleImputer() None OneHotEncoder()
2 0.756 0.711 0.802 0.685 30.000 1,659.000 25.000 11.000 0.781 gini SimpleImputer() PCA('mle') OneHotEncoder()
3 0.753 0.716 0.791 0.303 81.000 1,063.000 15.000 27.000 0.502 gini SimpleImputer(strategy='median') None OneHotEncoder()
4 0.751 0.713 0.789 0.328 5.000 1,047.000 23.000 43.000 0.957 entropy SimpleImputer(strategy='median') PCA('mle') OneHotEncoder()
5 0.745 0.704 0.786 0.762 88.000 1,235.000 8.000 7.000 0.666 gini SimpleImputer(strategy='median') PCA('mle') CustomOrdinalEncoder()
In [15]:
results.to_formatted_dataframe(query='model == "LogisticRegression()"', include_rank=True)
Out[15]:
rank roc_auc Mean roc_auc 95CI.LO roc_auc 95CI.HI C imputer scaler pca encoder
1 0.763 0.725 0.802 <NA> SimpleImputer() StandardScaler() None OneHotEncoder()
2 0.761 0.697 0.825 0.001 SimpleImputer(strategy='median') MinMaxScaler() None OneHotEncoder()
3 0.746 0.716 0.776 23.327 SimpleImputer(strategy='median') StandardScaler() None OneHotEncoder()
4 0.730 0.702 0.758 0.000 SimpleImputer(strategy='median') StandardScaler() None CustomOrdinalEncoder()
5 0.726 0.697 0.755 0.000 SimpleImputer(strategy='median') StandardScaler() PCA('mle') CustomOrdinalEncoder()

BayesSearchCV Performance Over Time¶

In [16]:
results.plot_performance_across_trials(facet_by='model').show()
In [17]:
results.plot_performance_across_trials(query='model == "RandomForestClassifier()"').show()

Variable Performance Over Time¶

In [18]:
results.plot_parameter_values_across_trials(query='model == "RandomForestClassifier()"').show()

Scatter Matrix¶

In [19]:
# results.plot_scatter_matrix(query='model == "RandomForestClassifier()"',
#                             height=1000, width=1000).show()

Variable Performance - Numeric¶

In [20]:
results.plot_performance_numeric_params(query='model == "RandomForestClassifier()"',
                                        height=800)
In [21]:
results.plot_parallel_coordinates(query='model == "RandomForestClassifier()"').show()

Variable Performance - Non-Numeric¶

In [22]:
results.plot_performance_non_numeric_params(query='model == "RandomForestClassifier()"').show()

In [23]:
results.plot_score_vs_parameter(
    query='model == "RandomForestClassifier()"',
    parameter='max_features',
    size='max_depth',
    color='encoder',
)

In [24]:
# results.plot_parameter_vs_parameter(
#     query='model == "XGBClassifier()"',
#     parameter_x='colsample_bytree',
#     parameter_y='learning_rate',
#     size='max_depth'
# )
In [25]:
# results.plot_parameter_vs_parameter(
#     query='model == "XGBClassifier()"',
#     parameter_x='colsample_bytree',
#     parameter_y='learning_rate',
#     size='imputer'
# )

Best Model - Test Set Performance¶

In [26]:
file_name = file_path('experiment_best_estimator.pkl')
best_estimator = hlp.utility.read_pickle(file_name)
In [27]:
x_test = pd.read_pickle(file_path('x_test.pkl'))
x_test.head()
Out[27]:
checking_status duration credit_history purpose credit_amount savings_status employment installment_commitment personal_status other_parties residence_since property_magnitude age other_payment_plans housing existing_credits job num_dependents own_telephone foreign_worker
521 <0 18.00 existing paid radio/tv 3190.00 <100 1<=X<4 2.00 female div/dep/mar none 2.00 real estate 24.00 none own 1.00 skilled 1.00 none yes
737 <0 18.00 existing paid new car 4380.00 100<=X<500 1<=X<4 3.00 male single none 4.00 car 35.00 none own 1.00 unskilled resident 2.00 yes yes
740 <0 24.00 all paid new car 2325.00 100<=X<500 4<=X<7 2.00 male single none 3.00 car 32.00 bank own 1.00 skilled 1.00 none yes
660 >=200 12.00 existing paid radio/tv 1297.00 <100 1<=X<4 3.00 male mar/wid none 4.00 real estate 23.00 none rent 1.00 skilled 1.00 none yes
411 no checking 33.00 critical/other existing credit used car 7253.00 <100 4<=X<7 3.00 male single none 2.00 car 35.00 none own 2.00 high qualif/self emp/mgmt 1.00 yes yes
In [28]:
y_test = hlp.utility.read_pickle(file_path('y_test.pkl'))
y_test[0:10]
Out[28]:
array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
In [29]:
test_predictions = best_estimator.predict_proba(x_test)[:, 1]
test_predictions[0:10]
Out[29]:
array([0.402, 0.494, 0.722, 0.374, 0.056, 0.476, 0.084, 0.476, 0.18 ,
       0.232])
In [30]:
evaluator = hlp.sklearn_eval.TwoClassEvaluator(
    actual_values=y_test,
    predicted_scores=test_predictions,
    score_threshold=0.37
)
In [31]:
evaluator.plot_actual_vs_predict_histogram()
In [32]:
evaluator.plot_confusion_matrix()
In [33]:
evaluator.all_metrics_df(return_style=True,
                         dummy_classifier_strategy=['prior', 'constant'],
                         round_by=3)
Out[33]:
  Score Dummy (prior) Dummy (constant) Explanation
AUC 0.825 0.500 0.500 Area under the ROC curve (true pos. rate vs false pos. rate); ranges from 0.5 (purely random classifier) to 1.0 (perfect classifier)
True Positive Rate 0.746 0.000 1.000 74.6% of positive instances were correctly identified.; i.e. 44 "Positive Class" labels were correctly identified out of 59 instances; a.k.a Sensitivity/Recall
True Negative Rate 0.801 1.000 0.000 80.1% of negative instances were correctly identified.; i.e. 113 "Negative Class" labels were correctly identified out of 141 instances
False Positive Rate 0.199 0.000 1.000 19.9% of negative instances were incorrectly identified as positive; i.e. 28 "Negative Class" labels were incorrectly identified as "Positive Class", out of 141 instances
False Negative Rate 0.254 1.000 0.000 25.4% of positive instances were incorrectly identified as negative; i.e. 15 "Positive Class" labels were incorrectly identified as "Negative Class", out of 59 instances
Positive Predictive Value 0.611 0.000 0.295 When the model claims an instance is positive, it is correct 61.1% of the time; i.e. out of the 72 times the model predicted "Positive Class", it was correct 44 times; a.k.a precision
Negative Predictive Value 0.883 0.705 0.000 When the model claims an instance is negative, it is correct 88.3% of the time; i.e. out of the 128 times the model predicted "Negative Class", it was correct 113 times
F1 Score 0.672 0.000 0.456 The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0.
Precision/Recall AUC 0.667 0.295 0.295 Precision/Recall AUC is calculated with `average_precision` which summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold. See sci-kit learn documentation for caveats.
Accuracy 0.785 0.705 0.295 78.5% of instances were correctly identified
Error Rate 0.215 0.295 0.705 21.5% of instances were incorrectly identified
% Positive 0.295 0.295 0.295 29.5% of the data are positive; i.e. out of 200 total observations; 59 are labeled as "Positive Class"
Total Observations 200 200 200 There are 200 total observations; i.e. sample size
In [34]:
evaluator.plot_roc_auc_curve().show()
<Figure size 720x444.984 with 0 Axes>
In [35]:
evaluator.plot_precision_recall_auc_curve().show()
In [36]:
evaluator.plot_threshold_curves(score_threshold_range=(0.1, 0.7)).show()
In [37]:
evaluator.plot_precision_recall_tradeoff(score_threshold_range=(0.1, 0.6)).show()
In [38]:
evaluator.calculate_lift_gain(return_style=True)
Out[38]:
  Gain Lift
Percentile    
5 0.14 2.71
10 0.24 2.37
15 0.37 2.49
20 0.49 2.46
25 0.56 2.24
30 0.64 2.15
35 0.75 2.13
40 0.76 1.91
45 0.80 1.77
50 0.83 1.66
55 0.85 1.54
60 0.88 1.47
65 0.88 1.36
70 0.93 1.33
75 0.95 1.27
80 0.97 1.21
85 0.98 1.16
90 1.00 1.11
95 1.00 1.05
100 1.00 1.00